# encoding:utf-8
# user: ares at 18-9-19

import scrapy
from scrapy.http import Request
from ershoufang.items import ErshoufangItem
from bs4 import BeautifulSoup
import re
from ershoufang.public import SpiderPublic as sp

class LianjiaSpider(scrapy.Spider):
    name='lianjia'
    allowed_domains=['sz.lianjia.com']

    bash_url='https://sz.lianjia.com/ershoufang/'

    def start_requests(self):
        for i in range(1,101):
            url=self.bash_url+'pg'+str(i)
            yield Request(url,self.parse)


    def parse(self, response):
        wbdata=BeautifulSoup(response.text,'html.parser').find_all('div',class_='info clear')

        for data in wbdata:
            d_url=data.find('div',class_='title').find('a')['href']
            title=data.find('div',class_='title').find('a').text.strip()
            #print(title)
            yield Request(d_url,callback=self.get_detailData,meta={
                'title':title,
                'url':d_url
            })


    def get_detailData(self,response):
        item=ErshoufangItem()
        item['title']=str(response.meta['title'])

        content=BeautifulSoup(response.text,'html.parser').find('div',class_='overview').find('div',class_='content')

        introContent=BeautifulSoup(response.text,'html.parser').find('div',class_='introContent')

        zongje=content.find('div',class_='price').find('span',class_='total').text.strip()

        strContent=str(content)
        #<div class="unitPrice"><span class="unitPriceValue">(.*?)<i>
        danjia=re.findall(r'<span class="unitPriceValue">(.*?)<i>.*</i></span>',strContent)
        danjia=float(danjia[0])

        chaoxiang=content.find('div',class_='houseInfo').find('div',class_='type').find('div',class_='mainInfo').text.strip()
        mianji=content.find('div',class_='houseInfo').find('div',class_='area').find('div',class_='mainInfo').text.strip()
        year = content.find('div', class_='houseInfo').find('div', class_='area').find('div',class_='subInfo').text.strip()[:4]

        mianji=float(mianji[:-2])

        aroundInfo=content.find('div',class_='aroundInfo')
        xiaoqu=aroundInfo.find('div',class_='communityName').find('a',class_='info ').text.strip()

        areaName=aroundInfo.find('div',class_='areaName').find('span',class_='info').find_all('a')
        dist=areaName[0].text.strip()
        quyu=sp.updateDist(dist)
        jiedao=areaName[1].text.strip()

        base=introContent.find('div',class_='base').find('div',class_='content').find('ul').find_all('li')

        transaction=introContent.find('div',class_='transaction').find('div',class_='content').find('ul').find_all('li')
        address=''
        jtms=''
        huxing=''
        louceng=''
        zhuangxiu=''
        fangID=''
        laiyuan='链家'
        for b in base:
            bInfo=str(b)
            #print(bInfo)
            #<li><span class="label">房屋户型</span>5室2厅1厨2卫</li>
            jbsx=re.findall(r'<li><span class="label">(.*?)</span>(.*?)</li>',bInfo)
            label=jbsx[0][0]
            name=jbsx[0][1]
            if label=='房屋户型':
                huxing=name
            elif label=='所在楼层':
                louceng=name
            elif label=='装修情况':
                zhuangxiu=name
            elif label=='产权年限':
                cqnx=name

        for t in transaction:
            tInfo=t.find_all('span')
            label1=tInfo[0].text.strip()
            name1=tInfo[1].text.strip()

            if label1=='交易权属':
                cqxz=name1
            elif label1=='房屋用途':
                zhuzhaileixing=name1
            elif label1=='房源编码':
                fangID=name1

        item['zongje']=zongje
        item['danjia'] = danjia
        item['year'] = year
        item['chaoxiang'] =chaoxiang
        item['mianji'] = mianji
        item['xiaoqu'] = xiaoqu
        item['quyu'] = quyu
        item['jtms'] = jtms
        item['address'] = address
        item['jiedao'] = jiedao
        item['huxing'] = huxing
        item['louceng'] = louceng
        item['zhuangxiu'] = zhuangxiu
        item['cqxz'] = cqxz
        item['zhuzhaileixing'] = zhuzhaileixing
        item['laiyuan'] = laiyuan

        return item
